rm(list=ls())
library("ocomposition") 

setwd("/home/andrei/Desktop/Dropbox/rolling elections/Bayes") 
n.sample=5000

bulk <- read.csv("mydata.csv")
bulk <- bulk[c("vs_viab","vs_nviab","hlvote","vs_viab_lag","vs_nviab_lag","thedate","ststart","stend","state","partymark",
               "medianhha","newstv","newspaper","illiterate","graduate","enrel","enlang","scres","stres","year","yr1","yr2",paste("vshare",1:42,sep=""),"mark")] 
nrow(bulk)
days <- bulk[["thedate"]]-bulk[["ststart"]]
bulk <- bulk[which(days<11),]
days <- days[which(days<11)]
nrow(bulk)

v <- bulk[paste("vshare",1:42,sep="")]
n <- bulk[["mark"]]
later <- as.numeric(days>=5)
dummies <- as.matrix(bulk[c("scres","stres","yr1","yr2")])
zvar <- as.matrix(bulk[c("partymark","medianhha","newstv","newspaper","illiterate","graduate","enrel","enlang")])
for (i in 1:ncol(zvar)){
  zvar[,i] <- (zvar[,i]-mean(zvar[,i],na.rm=TRUE))/sd(zvar[,i],na.rm=TRUE)
}

zvarx <- cbind(later,days,zvar,dummies,n)
# casewise deletion
nmiss <- rowSums(is.na(zvarx[,c("partymark","medianhha","newstv","newspaper","illiterate","graduate","enrel","enlang")]))==0
indvar <- zvarx[nmiss,]
vx <- v[nmiss,]

# estimation
out=fitcomp(data.v=vx, data.x=indvar, n.formula= n ~ later + medianhha + newstv + newspaper + illiterate + graduate + enrel + enlang + yr1 + yr2,
    v.formula= ~ later + medianhha + newstv + newspaper + illiterate + graduate + enrel + enlang + log(n), n.sample=n.sample,burn=7000,thin=5)

save(out,file="estimates ORCOMP1.RData")

###########################   post-estimation #################################################
##  diagnostics
load("estimates ORCOMP1.RData")
xyz <- apply(out[["g"]],3L,c)
zzz <- mcmc(data=t(xyz), start = 1, end = n.sample, thin = 5)
test <- heidel.diag(zzz)

## predictions

xyz = summary(out)
print(xtable(xyz[[1]][,c(1,3,4)], type = "latex", digits=3), file = "ORCOMP later-1.tex")
print(xtable(xyz[[2]][,c(1,3,4)], type = "latex", digits=3), file = "ORCOMP later-2.tex")

data.means <- colMeans(zvar[,c("medianhha","newstv","newspaper","illiterate","graduate","enrel","enlang")])
fake <- data.frame(later=c(0,1),n=10,yr1=0,yr2=0)
fake <- cbind(fake,t(data.means))
v.hat <- as.list(1:2)
sum.v.hat <- as.list(1:2)
for (i in 1:2) {
v.hat[[i]] <- predict(out,fake[i,])
sum.v.hat[[i]] <- colMeans(v.hat[[i]])
}

summary(v.hat[[1]][,2]-v.hat[[1]][,3])
quantile(v.hat[[1]][,2]-v.hat[[1]][,3],prob=c(0.025,0.975)) 
summary(v.hat[[2]][,2]-v.hat[[2]][,3])
quantile(v.hat[[2]][,2]-v.hat[[2]][,3],prob=c(0.025,0.975))
 
setEPS(width=11,height=6)
postscript("ocomp1.eps") 
par(mfrow=c(1,2))
barplot(sum.v.hat[[1]][1:10], main="Earlier Phases", ylim=c(0,0.4), names.arg=(1:10), xlab="Candidate rank", cex.lab=1.5, cex.axis=1.2, cex.names=1.2)
grid(nx=NA,ny=NULL,col="gray20")
barplot(sum.v.hat[[2]][1:10], main="Later Phases", ylim=c(0,0.4), names.arg=(1:10), xlab="Candidate rank", cex.lab=1.5, cex.axis=1.2, cex.names=1.2)
grid(nx=NA,ny=NULL,col="gray20")
dev.off()